from datasave import DataSave
from bs4 import BeautifulSoup
import re
import requests
import os
class Scheduler:
    def __init__(self,path,root_url,count):
        self.root_url = root_url
        self.count = count
        self.path = save_url
    
    def run_spider(self):
        #print(root_url)
        urls = list()
        if root_url is not None:
            try:
                response = requests.get(root_url)
                print(response)
                if response.status_code == 200:
                    print("website responded")
                    response.encoding = "utf-8"
                    print(response.text)
                    soup = BeautifulSoup(response.text,'lxml')
                    #print(soup)
                    a = soup.select_one('p')
                    print(a)
                    links = soup.select('p')
                    for link in links:
                        url = link['class']
                        urls.append(url)
                    print(urls)
                    if os.path.exists(self.path):
                        with open(save_url,'a') as fp:
                            print('开始写入数据')
                            fp.write(str(urls) + '\n')
                        fp.close()
            except Exception as e:
                print("error:{0}".format(e))
                

if __name__ == "__main__":
    root_url = "https://www.baidu.com/s?ie=UTF-8&wd=s4%E8%99%9A%E6%8B%9F%E6%9C%BA"
    save_url = "D:\\test.txt"
    Spider = Scheduler(save_url,root_url,20)
    Spider.run_spider()    